library(bigMap)

Run pt-SNE on Sierpinski-3d with perplexity 2030 (99% of the data set size) and a decreasing range of thread-ratio. We show:

# source aux. stuff (graph plot function)
source('../graphs.R')
# load edge matrix
E <- as.matrix(read.csv('../sierpinski3d.edg', sep = '', header = F))

Load data

D <- as.matrix(read.csv('../sierpinski3d.mtx', sep = '', header = F))

Compute betas

g <- bdm.init(D, dSet.name = 'S3D', is.distance = T, ppx = 2030, threads = 4)

Run ptSNE (perplexity = 2030, decreasing thread-ratio)

g1 <- bdm.ptsne(D, g, threads = 1, layers = 1)
g2 <- bdm.ptsne(D, g, threads = 3, layers = 2)
g3 <- bdm.ptsne(D, g, threads = 4, layers = 2)
g4 <- bdm.ptsne(D, g, threads = 5, layers = 2)
g5 <- bdm.ptsne(D, g, threads = 6, layers = 2)
g6 <- bdm.ptsne(D, g, threads = 8, layers = 2)
g.list <- list(g1, g2, g3, g4, g5, g6)
save(g.list, file = './s3d_2030.RData')

Output

nulL <- lapply(g.list, function(g) graph.plot(g, E))

hl-Correlation

g.list <- lapply(g.list, function(g) bdm.hlCorr(D, g, threads = 4))
save(g.list, file = './s3d_2030.RData')
hlTable <- sapply(g.list, function(g) summary(g$hlC))
hlTable <- t(round(hlTable, 4))
threadRatio <- sapply(g.list, function(g) g$ptsne$layers /g$ptsne$threads)
rownames(hlTable) <- round(threadRatio, 2)
knitr::kable(hlTable, caption = 'hl-Correlation by thread-ratio') %>%
  kable_styling(full_width = F)
hl-Correlation by thread-ratio
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 0.7383 0.7493 0.7547 0.7522 0.7576 0.7609
0.67 0.7403 0.7459 0.7477 0.7496 0.7515 0.7626
0.5 0.7463 0.7464 0.7509 0.7513 0.7559 0.7574
0.4 0.7373 0.7444 0.7502 0.7497 0.7555 0.7612
0.33 0.7549 0.7569 0.7579 0.7590 0.7599 0.7654
0.25 0.7402 0.7435 0.7463 0.7464 0.7491 0.7527

k-ary neighborhood preservation

g.list <- lapply(g.list, function(g) bdm.knp(D, g, threads = 4))
save(g.list, file = './s3d_2030.RData')
bdm.knp.plot(g.list, ppxfrmt = 0)

Running Times

rTimes <- sapply(g.list, function(g) c(g$ppx$t[3], g$t$epoch, g$t$ptsne[3], sum(c(g$ppx$t[3], g$t$ptsne[3]))))
rTimes <- round(rTimes, 2)
threadRatio <- sapply(g.list, function(g) g$ptsne$layers /g$ptsne$threads)
colnames(rTimes) <- round(threadRatio, 2)
rownames(rTimes) <- c('betas', 'epoch', 'ptSNE', 'total')
knitr::kable(rTimes, caption = 'Computation times (s) by thread-ratio') %>%
  kable_styling(full_width = F)
Computation times (s) by thread-ratio
1 0.67 0.5 0.4 0.33 0.25
betas 0.38 0.38 0.38 0.38 0.38 0.38
epoch 1.89 0.86 0.54 0.48 0.40 0.29
ptSNE 62.85 27.84 17.01 15.12 12.41 9.02
total 63.22 28.21 17.39 15.50 12.79 9.39

Run on: Intel(R) Xeon(R) CPU E31225 @ 3.10GHz, 4 cores, 16GB RAM.

Embedding final compression (EFC)

# take the embedding with lowest thread-ratio
g.2030c <- g.list[6:6]
# compress with ppx = 1845 (0.90)
g.2030c <- bdm.efc(D, g.2030c, ppx = 1845, iters = 50, threads = 4)
# compress with ppx = 1025 (0.50)
g.2030c <- bdm.efc(D, g.2030c, ppx = 1025, iters = 50, threads = 4)
# compress with ppx = 205 (0.10)
g.2030c <- bdm.efc(D, g.2030c, ppx = 205, iters = 50, threads = 4)
# compress with ppx = 102 (0.05)
g.2030c <- bdm.efc(D, g.2030c, ppx = 102, iters = 50, threads = 4)
# compress with ppx = 20 (0.01)
g.2030c <- bdm.efc(D, g.2030c, ppx = 20, iters = 50, threads = 4)
save(g.2030c, file = './s3d_2030c.RData')

EFC output vs. output from Fig.1 (with thread-ratio=1.0)

# load embeddings with thread-ratio = 1.0 (from Fig.1)
load('../pt-SNE/glist.RData')
g.threadRatio1 <- g.list[c(13, 11, 7, 3, 2, 1)]
g <- g.threadRatio1[[1]]
graph.plot(g, E, title = 'ppx=2030, thread-ratio=1.0')
g <- g.2030c[[1]]
graph.plot(g, E, title = 'ppx=2030, thread-ratio=0.25')

nulL <- lapply(2:6, function(i) {
  g <- g.threadRatio1[[i]]
  graph.plot(g, E, title = paste('ppx=', g$ppx$ppx, ', thread-ratio=1.0', sep = ''))
  g <- g.2030c[[i]]
  graph.plot(g, E, title = paste('ppx=2030, thread-ratio=0.25, +efc.', g$ppx$ppx, sep = ''))
})

EFC hl-Correlation

g.2030c <- lapply(g.2030c, function(g) bdm.hlCorr(D, g, zSampleSize = 1000, threads = 4))
hlTable <- sapply(g.2030c, function(g) summary(g$hlC))
hlTable <- t(round(hlTable, 4))
efc.ppx <- paste('efc.', sapply(g.2030c, function(g) g$ppx$ppx), sep = '')
rownames(hlTable) <- efc.ppx
knitr::kable(hlTable, caption = 'hl-Correlation by EFC-perplexity') %>%
  kable_styling(full_width = F)
hl-Correlation by EFC-perplexity
Min. 1st Qu. Median Mean 3rd Qu. Max.
efc.2030 0.7513 0.7527 0.7544 0.7545 0.7561 0.7578
efc.1845 0.7342 0.7374 0.7418 0.7430 0.7474 0.7541
efc.1025 0.7215 0.7226 0.7264 0.7280 0.7318 0.7375
efc.205 0.7214 0.7273 0.7313 0.7301 0.7341 0.7364
efc.102 0.7162 0.7181 0.7199 0.7217 0.7235 0.7310
efc.20 0.7115 0.7159 0.7249 0.7244 0.7334 0.7366

EFC k-ary neighborhood preservation

g.2030c <- lapply(g.2030c, function(g) bdm.knp(D, g, k.max = NULL, sampling = 0.9, threads = 4))
bdm.knp.plot(g.2030c, ppxfrmt = 0)

Running Times

rTimes <- sapply(g.2030c, function(g) c(g$ppx$t[3], g$t$epoch, g$t$ptsne[3], g$t$efc[3], sum(c(g$ppx$t[3], g$t$ptsne[3], g$t$efc[3]))))
rTimes <- round(rTimes, 2)
colnames(rTimes) <- sapply(g.2030c, function(g) g$ppx$ppx)
rownames(rTimes) <- c('betas', 'epoch', 'ptSNE', 'EFC', 'total')
knitr::kable(rTimes, caption = 'Computation times (s) by EFC-perplexity') %>%
  kable_styling(full_width = F)
Computation times (s) by EFC-perplexity
2030 1845 1025 205 102 20
betas 0.38 0.45 0.56 0.34 0.14 0.11
epoch 0.29 0.29 0.29 0.29 0.29 0.29
ptSNE 9.02 9.02 9.02 9.02 9.02 9.02
EFC 0.00 19.76 19.23 18.83 19.15 19.19
total 9.39 29.23 28.81 28.19 28.31 28.32

Run on: Intel(R) Xeon(R) CPU E31225 @ 3.10GHz, 4 cores, 16GB RAM.